Libraries & external code needed:
Set working directory where the script is
if (Sys.getenv("USER")=="JQ") {
setwd("/Users/JQ/Documents/_CODE REPOS/GitHub/Da_RNAseq")
} else if (Sys.getenv("RSTUDIO")==1) {
setwd( dirname(rstudioapi::getSourceEditorContext(id = NULL)$path) ) # gets what is in the editor
} else {
setwd(here::here())
d <- str_split(getwd(),'/')[[1]][length(str_split(getwd(),'/')[[1]])]
if (d != 'Da_RNAseq') { stop(
paste0("Could not set working directory automatically to where this",
" script resides.\nPlease do `setwd()` manually"))
}
}
getwd()
## [1] "/Users/JQ/Documents/_CODE REPOS/GitHub/Da_RNAseq"
Path to definitive images (outside repo):
figdir <- paste0(c(head(str_split(getwd(),'/')[[1]],-1),
paste0(tail(str_split(getwd(),'/')[[1]],1), '_figures')),
collapse='/')
dir.create(figdir, showWarnings = FALSE)
This gets us the DGE data from DESeq2, identified by
FlyBase/Ensembl ID and gene symbol:
# experimental design and labels
targets <- readRDS('output/targets.RDS')
# DEG data
DaKD_deg <- readRDS('output/Control_vs_DaKD.RDS')
DaOE_deg <- readRDS('output/Control_vs_DaOE.RDS')
DaDaOE_deg <- readRDS('output/Control_vs_DaDaOE.RDS')
ScOE_deg <- readRDS('output/Control_vs_ScOE.RDS')
head(
ScOE_deg %>% dplyr::select(gene_symbol, ensemblGeneID, baseMean, log2FoldChange, padj),
1
)
For daughterless knockdown:
# using `clusterProfiler` and `enrichplot`
# select reg set
daKD_up <- make_degset(DaKD_deg, up=TRUE, fc_thresh=1.5)
# ORA
daKD_up_eGO <- enrichGO(gene = daKD_up$gene_symbol,
OrgDb = org.Dm.eg.db,
keyType = 'SYMBOL',
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
# with down genes
daKD_dn <- make_degset(DaKD_deg, up=FALSE, fc_thresh=1.5)
daKD_dn_eGO <- enrichGO(gene = daKD_dn$gene_symbol,
OrgDb = org.Dm.eg.db,
keyType = 'SYMBOL',
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
# plot
up <- dotplot(daKD_up_eGO, showCategory=30, label_format=50) +
ggtitle("Over-represented GO terms — genes **up** in *esg > da^RNAi^*") +
scale_colour_gradient(low = cet_pal(3, name='cbd1')[2],
high = cet_pal(3, name='cbd1')[3]) +
theme(plot.title = element_markdown(size=rel(1.2)),
panel.grid = element_blank(),
axis.text.y = element_text(size=8, lineheight=0.5))
down <- dotplot(daKD_dn_eGO, showCategory=30, label_format=40) +
ggtitle("Over-represented GO terms — genes **down** in *esg > da^RNAi^*") +
scale_colour_gradient(low = cet_pal(3, name='cbd1')[2],
high = cet_pal(3, name='cbd1')[1]) +
theme(plot.title = element_markdown(),
panel.grid = element_blank(),
axis.text.y = element_text(size=9))
suppressWarnings(print(up))
suppressWarnings(print(down))
For daughterless overexpression:
# using `clusterProfiler` and `enrichplot`
# select reg set
daOE_up <- make_degset(DaOE_deg, up=TRUE, fc_thresh=1.5)
# ORA
daOE_up_eGO <- enrichGO(gene = daOE_up$gene_symbol,
OrgDb = org.Dm.eg.db,
keyType = 'SYMBOL',
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
# with down genes
daOE_dn <- make_degset(DaOE_deg, up=FALSE, fc_thresh=1.5)
daOE_dn_eGO <- enrichGO(gene = daOE_dn$gene_symbol,
OrgDb = org.Dm.eg.db,
keyType = 'SYMBOL',
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
# plot
up <- dotplot(daOE_up_eGO, showCategory=30, label_format=60) +
ggtitle("Over-represented GO terms — genes **up** in *esg > da*") +
scale_colour_gradient(low = cet_pal(3, name='cbd1')[2],
high = cet_pal(3, name='cbd1')[3]) +
theme(plot.title = element_markdown(size=rel(1.15)),
panel.grid = element_blank(),
axis.text.y = element_text(size=9))
down <- dotplot(daOE_dn_eGO, showCategory=30, label_format=60) +
ggtitle("Over-represented GO terms — genes **down** in *esg > da*") +
scale_colour_gradient(low = cet_pal(3, name='cbd1')[2],
high = cet_pal(3, name='cbd1')[1]) +
theme(plot.title = element_markdown(size=rel(1.15)),
panel.grid = element_blank(),
axis.text.y = element_text(size=8, lineheight=0.5))
suppressWarnings(print(up))
suppressWarnings(print(down))
For da:da overexpression:
# using `clusterProfiler` and `enrichplot`
# select reg set
dadaOE_up <- make_degset(DaDaOE_deg, up=TRUE, fc_thresh=1.5)
# ORA
dadaOE_up_eGO <- enrichGO(gene = dadaOE_up$gene_symbol,
OrgDb = org.Dm.eg.db,
keyType = 'SYMBOL',
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
# with down genes
dadaOE_dn <- make_degset(DaDaOE_deg, up = FALSE, fc_thresh = 1.5)
dadaOE_dn_eGO <- enrichGO(gene = dadaOE_dn$gene_symbol,
OrgDb = org.Dm.eg.db,
keyType = 'SYMBOL',
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
# plot
up <- dotplot(dadaOE_up_eGO, showCategory=30, label_format=40) +
ggtitle("Over-represented GO terms — genes **up** in *esg > da:da*") +
scale_colour_gradient(low = cet_pal(3, name='cbd1')[2],
high = cet_pal(3, name='cbd1')[3]) +
force_panelsizes(rows = unit(3, "in"), cols = unit(2, "in")) +
theme(plot.title = element_markdown(),
plot.title.position = 'plot',
panel.grid = element_blank(),
axis.text.y = element_text(size=9))
down <- dotplot(dadaOE_dn_eGO, showCategory=30, label_format=40) +
ggtitle("Over-represented GO terms — genes **down** in *esg > da:da*") +
scale_colour_gradient(low = cet_pal(3, name='cbd1')[2],
high = cet_pal(3, name='cbd1')[1]) +
force_panelsizes(rows = unit(3, "in"), cols = unit(2, "in")) +
theme(plot.title = element_markdown(),
plot.title.position = 'plot',
panel.grid = element_blank(),
axis.text.y = element_text(size=9))
tryCatch({ print(up) },
error = function (e) {
message('Error occurred when trying to plot dotplot')
print(e)})
## <simpleError in ans[ypos] <- rep(yes, length.out = len)[ypos]: replacement has length zero>
tryCatch({ print(down) },
error = function (e) {
message('Error occurred when trying to plot dotplot')
print(e)})
For scute overexpression:
# using `clusterProfiler` and `enrichplot`
# select reg set
scOE_up <- make_degset(ScOE_deg, up=TRUE, fc_thresh=1.5)
# ORA
scOE_up_eGO <- enrichGO(gene = scOE_up$gene_symbol,
OrgDb = org.Dm.eg.db,
keyType = 'SYMBOL',
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
# with down genes
scOE_dn <- make_degset(ScOE_deg, up=FALSE, fc_thresh=1.5)
scOE_dn_eGO <- enrichGO(gene = scOE_dn$gene_symbol,
OrgDb = org.Dm.eg.db,
keyType = 'SYMBOL',
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
# plot
up <- dotplot(scOE_up_eGO, showCategory=30, label_format=60) +
ggtitle("Over-represented GO terms — genes **up** in *esg > scute*") +
scale_colour_gradient(low = cet_pal(3, name='cbd1')[2],
high = cet_pal(3, name='cbd1')[3]) +
theme(plot.title = element_markdown(),
panel.grid = element_blank(),
axis.text.y = element_text(size=9))
down <- dotplot(scOE_dn_eGO, showCategory=30, label_format=60) +
ggtitle("Over-represented GO terms — genes **down** in *esg > scute*") +
scale_colour_gradient(low = cet_pal(3, name='cbd1')[2],
high = cet_pal(3, name='cbd1')[1]) +
theme(plot.title = element_markdown(),
panel.grid = element_blank(),
axis.text.y = element_text(size=8, lineheight=0.5))
suppressWarnings(print(up))
suppressWarnings(print(down))
None of these plots are particularly informative, so I am going to move on to GSEA and custom gene sets.
For daughterless knockdown:
# using `clusterProfiler` and `enrichplot`
# select reg set
daKD_rank <- make_degrank(DaKD_deg, mode='log2fc')
# GSEA
daKD_GSEgo <- gseGO(geneList=daKD_rank,
keyType = 'SYMBOL',
OrgDb=org.Dm.eg.db,
minGSSize = 15,
maxGSSize = 1000,
pvalueCutoff = 0.05,
eps = 0,
verbose=F)
# get the logP values instead of padj
daKD_GSEgo_log <- daKD_GSEgo
daKD_GSEgo_log@result$p.adjust <- -log10(daKD_GSEgo_log@result$p.adjust) # use @slot and $column to assign
# to subset the data to the highest NES values
#daKD_GSEgo_log@result <- subset(daKD_GSEgo_log@result, abs(NES)>1.8)
# to subset the data to the lowest p-values:
number_of_terms <- 30
cutoff <- rev(rev(sort(daKD_GSEgo_log$p.adjust))[1:number_of_terms])[1]
daKD_GSEgo_log@result <- subset(daKD_GSEgo_log@result, p.adjust>=cutoff)
# to control colour of GO terms according to enrichment or depletion
d <- ifelse(sort(daKD_GSEgo_log$NES) > 0, cet_pal(2, name='cbd1')[2], cet_pal(2, name='cbd1')[1])
# plot
gsep <- dotplot(daKD_GSEgo_log,
x='NES',
showCategory=number_of_terms,
label_format=60,
) +
ggtitle("Enriched/depleted GO terms in *esg > da^RNAi^*") +
scale_colour_gradient(low = cet_pal(3, name='d2')[2],
high = cet_pal(3, name='d2')[3]) +
labs(color = '-log~10~(*p*)') +
theme(plot.title = element_markdown(),
plot.title.position = 'panel',
legend.title = element_markdown(),
panel.grid = element_blank(),
axis.text.y = element_text(size=9, colour=d))
suppressWarnings(print(gsep))
For daughterless overexpression:
# using `clusterProfiler` and `enrichplot`
# select reg set
daOE_rank <- make_degrank(DaOE_deg, mode='log2fc')
# GSEA
daOE_GSEgo <- gseGO(geneList=daOE_rank,
keyType = 'SYMBOL',
OrgDb=org.Dm.eg.db,
minGSSize = 15,
maxGSSize = 1000,
pvalueCutoff = 0.05,
eps = 0,
verbose=F)
# get the logP values instead of padj
daOE_GSEgo_log <- daOE_GSEgo
daOE_GSEgo_log@result$p.adjust <- -log10(daOE_GSEgo_log@result$p.adjust) # use @slot and $column to assign
# to subset the data to the highest NES values
#daOE_GSEgo_log@result <- subset(daOE_GSEgo_log@result, abs(NES)>1.8)
# to subset the data to the lowest p-values:
number_of_terms <- 30
cutoff <- rev(rev(sort(daOE_GSEgo_log$p.adjust))[1:number_of_terms])[1]
daOE_GSEgo_log@result <- subset(daOE_GSEgo_log@result, p.adjust>=cutoff)
# to control colour of GO terms according to enrichment or depletion
d <- ifelse(sort(daOE_GSEgo_log$NES) > 0, cet_pal(2, name='cbd1')[2], cet_pal(2, name='cbd1')[1])
# plot
gsep <- dotplot(daOE_GSEgo_log,
x='NES',
showCategory=number_of_terms,
label_format=60,
) +
ggtitle("Enriched/depleted GO terms in *esg > da*") +
scale_colour_gradient(low = cet_pal(3, name='d2')[2],
high = cet_pal(3, name='d2')[3]) +
labs(color = '-log~10~(*p*)') +
theme(plot.title = element_markdown(),
plot.title.position = 'panel',
legend.title = element_markdown(),
panel.grid = element_blank(),
axis.text.y = element_text(size=9, colour=d))
suppressWarnings(print(gsep))
For da:da overexpression:
# using `clusterProfiler` and `enrichplot`
# select reg set
dadaOE_rank <- make_degrank(DaDaOE_deg, mode='log2fc')
# GSEA
dadaOE_GSEgo <- gseGO(geneList=dadaOE_rank,
keyType = 'SYMBOL',
OrgDb=org.Dm.eg.db,
minGSSize = 15,
maxGSSize = 1000,
pvalueCutoff = 0.05,
eps = 0,
verbose=F)
# get the logP values instead of padj
dadaOE_GSEgo_log <- dadaOE_GSEgo
dadaOE_GSEgo_log@result$p.adjust <- -log10(dadaOE_GSEgo_log@result$p.adjust) # use @slot and $column to assign
# to subset the data to the highest NES values
#dadaOE_GSEgo_log@result <- subset(dadaOE_GSEgo_log@result, abs(NES)>1.8)
# to subset the data to the lowest p-values:
number_of_terms <- 30
cutoff <- rev(rev(sort(dadaOE_GSEgo_log$p.adjust))[1:number_of_terms])[1]
dadaOE_GSEgo_log@result <- subset(dadaOE_GSEgo_log@result, p.adjust>=cutoff)
# to control colour of GO terms according to enrichment or depletion
d <- ifelse(sort(dadaOE_GSEgo_log$NES) > 0,
cet_pal(2, name='cbd1')[2], cet_pal(2, name='cbd1')[1])
# plot
gsep <- dotplot(dadaOE_GSEgo_log,
x='NES',
showCategory=number_of_terms,
label_format=50,
) +
ggtitle("Enriched/depleted GO terms in *esg > da:da*") +
scale_colour_gradient(low = cet_pal(3, name='d2')[2],
high = cet_pal(3, name='d2')[3]) +
labs(color = '-log~10~(*p*)') +
#coord_fixed(ratio=0.8) +
force_panelsizes(rows = unit(3.5,'in'), cols = unit(4,'in')) +
theme(plot.title = element_markdown(),
plot.title.position = 'panel',
legend.title = element_markdown(),
panel.grid = element_blank(),
axis.text.y = element_text(size=7, lineheight=0.7, colour=d)
)
#gsep$coordinates$ratio <- 0.01
suppressWarnings(print(gsep))
For scute overexpression:
# using `clusterProfiler` and `enrichplot`
# select reg set
scOE_rank <- make_degrank(ScOE_deg, mode='log2fc')
# GSEA
scOE_GSEgo <- gseGO(geneList=scOE_rank,
keyType = 'SYMBOL',
OrgDb=org.Dm.eg.db,
minGSSize = 15,
maxGSSize = 1000,
pvalueCutoff = 0.05,
nPermSimple = 1000000,
eps = 0,
verbose=F)
# get the logP values instead of padj
scOE_GSEgo_log <- scOE_GSEgo
scOE_GSEgo_log@result$p.adjust <- -log10(scOE_GSEgo_log@result$p.adjust) # use @slot and $column to assign
# to subset the data to the highest NES values
#scOE_GSEgo_log@result <- subset(scOE_GSEgo_log@result, abs(NES)>1.8)
# to subset the data to the lowest p-values:
number_of_terms <- 30
cutoff <- rev(rev(sort(scOE_GSEgo_log$p.adjust))[1:number_of_terms])[1]
scOE_GSEgo_log@result <- subset(scOE_GSEgo_log@result, p.adjust>=cutoff)
# to control colour of GO terms according to enrichment or depletion
d <- ifelse(sort(scOE_GSEgo_log$NES) > 0,
cet_pal(2, name='cbd1')[2], cet_pal(2, name='cbd1')[1])
# plot
gsep <- dotplot(scOE_GSEgo_log,
x='NES',
showCategory=number_of_terms,
label_format=60
) +
ggtitle("Enriched/depleted GO terms in *esg > scute*") +
scale_colour_gradient(low = cet_pal(3, name='d2')[2],
high = cet_pal(3, name='d2')[3]) +
labs(color = '-log~10~(*p*)') +
theme(plot.title = element_markdown(),
plot.title.position = 'panel',
legend.title = element_markdown(),
panel.grid = element_blank(),
axis.text.y = element_text(size=9, colour=d))
suppressWarnings(print(gsep))
I think that the signals are not strong enough to give anything meaningful. It may be better to use more ‘focused’ gene sets.